from pyspark import SparkContext, SparkConf

conf = SparkConf().setAppName("filter").setMaster("local")
sc = SparkContext(conf=conf)

rdd = sc.parallelize([1, 2, 3, 4, 5])

# filter()方法可以对RDD进行过滤操作，返回一个新的RDD，只保留满足条件的元素。
def is_even(x):
    return x % 2 == 0

filtered_rdd = rdd.filter(is_even)

# filtered_rdd = rdd.filter(lambda x: x % 2 == 0)

print(filtered_rdd.collect())  # [2, 4]

sc.stop()